require("dplyr")

## set working directory package
## to Dataverse folder

## for 2521 we only use the public survey
## not the leaders's survey included in the same 
## folder

## download data
survey <- read.table("Harris_Data/Harris 1976 Business Survey, study no. 2521/harris_s2521_public_spss.tab", header = TRUE)


# pid
survey$pid <- c(1:nrow(survey))

# study 
survey$study <- as.character(2521)

# study year (year)
survey$year <- 1976

# geographic data (urban)
survey$urban <- dplyr::recode(survey$S13,
                              `2` = "Surburb",
                              `1` = "Central City",
                              `3` = "Town",
                              `4` = "Rural")

# geographic data (region)
survey$region <- dplyr::recode(survey$S11,
                               `3` = "South",
                               `8` = "West",
                               `6` = "Midwest",
                               `5` = "Midwest",
                               `7` = "West",
                               `2` = "East",
                               `1` = "East",
                               `4` = "South")

# respondent head of household (hh)
table(survey$F1)
survey$hh <- as.character(dplyr::recode(survey$F1,
                    `1` = "Male head",
                    `2` = "Female head (no male head)",
                    `3` = "Wife",
                    `4` = "Son",
                    `5` = "Daughter",
                    `6` = "Other"))
table(survey$hh)

# increasing inequality (inequality)
summary(survey$P2A_2)
survey$inequality <- as.character(dplyr::recode(as.character(survey$P2A_2), 
                            `1` = "Feel", 
                            `2` = "Don't Feel", 
                            `3` = "Not Sure"))
table(survey$inequality)

# inequality variable (inequality.variable)
survey$inequality.variable <- 1

# union (union.self)
table(survey$F6_1)
survey$union.self <- dplyr::recode(survey$F6_1,
                            `0` = "No",
                            `1` = "Yes")
table(survey$union.self)

summary(survey$F6_2)
survey$union.other <- dplyr::recode(survey$F6_2,
                             `0` = "No",
                             `1` = "Yes")
table(survey$union.other)

table(survey$F6_4) # not sure
survey[survey$F6_4 == 1, c("union.self", "union.other", "F6_3")]

survey$union.self[survey$F6_4 == 1] <- "Not Sure"
survey$union.other[survey$F6_4 == 1] <- "Not Sure"

# employment (employed)
table(survey$F2A)
survey$employed <- as.character(dplyr::recode(survey$F2A,
                          `1` = "Hourly wage worker",
                          `2` = "Salaried",
                          `3` = "Self-employed",
                          `4` = "Retired",
                          `5` = "Unemployed",
                          `6` = "Student",
                          `7` = "Military service",
                          `8` = "Housewife",
                          `9` = "Other (specify)"))
table(survey$employed)

# em self
survey$employed.self <- NA 

# occupation
survey$occupation <- as.character(dplyr::recode(survey$F2B, 
                            `1` = "Professional",
                            `2` = "Manager, official, proprietor",
                            `3` = "Clerical worker",
                            `4` = "Sales worker",
                            `5` = "Skilled craftsman, foreman",
                            `6` = "Operative, unskilled laborer (except farm)",
                            `7` = "Service worker",
                            `8` = "Farmer, farm manager, farm laborer",
                            `9` = "Other (specify)"))
table(survey$occupation)

## occupation self 
survey$occupation.self <- NA

# household size (hhsize)
survey$hhsize <- NA

# education (educ)
table(survey$F5)
survey$educ <- dplyr::recode(survey$F5,
                      `1` = "No formal schooling",
                      `2` = "1-7 years completed",
                      `3` = "8 years completed",
                      `4` = "Some high school",
                      `5` = "High school graduate",
                      `6` = "Some college",
                      `7` = "2-year college graduate",
                      `8` = "4-year college graduate",
                      `9` = "Post graduate")
table(survey$educ)

# household income (income)
survey$income <- as.character(dplyr::recode(survey$F7,
                        `1` = "Under $3000",
                        `2` = "$3,000 to $4,999",
                        `3` = "$5,000 to $6,999",
                        `4` = "$7,000 to $9,999",
                        `5` = "10,000 to $14,999",
                        `6` = "15,000 to $19,999",
                        `7` = "20,000 to $24,999",
                        `8` = "25,000 and over",
                        `9` = "Not sure/refused"))
table(survey$income)

# age
survey$age <- as.character(dplyr::recode(survey$F4C,
                     `1` = "18 to 20",
                     `2` = "21 to 24",
                     `3` = "25 to 29",
                     `4` = "30 to 34",
                     `5` = "35 to 39",
                     `6` = "40 to 49",
                     `7` = "50 to 64",
                     `8` = "65 and over",
                     `9` = "Refused"))
table(survey$age)


# race
survey$race <- as.character(dplyr::recode(survey$F10,
                      `1` = "White",
                      `2` = "Black",
                      `3` = "Oriental",
                      `4` = "Spanish-American (Puerto Rican, Mexican-American)",
                      `5` = "Other (specify)",
                      `6` = "Not sure"))
summary(survey$race)

# politics (party)
summary(survey$P3D)
survey$party <- as.character(dplyr::recode(survey$P3D,
                                     `1` = "Republican",
                                     `2` = "Democrat",
                                     `3` = "Independent",
                                     `4` = "Other",
                                     `5` = "Not sure"))
table(survey$party)

# politics (ideology)
survey$ideology <- as.character(dplyr::recode(survey$P3A,
                          `1` = "Conservative",
                          `2` = "Middle of the road",
                          `3` = "Liberal",
                          `4` = "Radical",
                          `5` = "Not sure"))
table(survey$ideology)

# gender
survey$gender <- as.character(dplyr::recode(survey$F11,
                        `1` = "Male",
                        `2` = "Female"))
table(survey$gender)

# religion
survey$religion <- as.character(dplyr::recode(survey$F8,
                          `1` = "Protestant",
                          `2` = "Catholic",
                          `3` = "Jewish",
                          `4` = "Other (write in)",
                          `5` = "None",
                          `6` = "Not sure"))
table(survey$religion)

## factuals
survey$factual1 <- NA
survey$factual2 <- NA
survey$factual3 <- NA

## alienation index
survey$dontcare <- dplyr::recode(survey$P2A_1, 
                                 `1` = "Feel", 
                                 `2` = "Don't Feel", 
                                 `3` = "Not Sure")
survey$dontcount <- dplyr::recode(survey$P2A_3, 
                                  `1` = "Feel", 
                                  `2` = "Don't Feel", 
                                  `3` = "Not Sure")
survey$leftout <- dplyr::recode(survey$P2A_4, 
                                `1` = "Feel", 
                                `2` = "Don't Feel", 
                                `3` = "Not Sure")
## question place
survey$question_place <- "before party"


# subset
survey_2521 <- survey[,c("pid", "study", "year", "urban", "region", "hh",
                         "inequality", "inequality.variable", "union.self", "union.other",
                         "employed", "employed.self", "occupation", "occupation.self", "hhsize", "educ", "income", 
                         "age", "race", "party", "ideology", "gender", "religion",
                         "factual1", "factual2", "factual3", "dontcare", "dontcount", "leftout",
                         "question_place")]


# save file
#saveRDS(survey_2521, file = "Harris_Data/survey_2521.rds")
